#####################################
# Author: Trevor Osaki              #
# Purpose: Survey Data Percentage   #
# Date: December 14, 2020           #
#####################################

# install.packages("haven") 
library(haven)  

# set up the work directory to access files 

setwd("/Users/leiyue/Desktop/Projects/WhenIsDicrimUnfair/Replication_Final")

survey_data = read.csv("1_Dataset Construction/jj_Materials for Weights/mturk_data_for_weights.csv")

num_gender = seq(1,2)
num_race = seq(1,2)
num_edu = seq(1,2)
num_age = seq(1,3)

shares = matrix(0,24,5)

n = 1;

for(i in 1:length(num_age)){
  for(j in 1:length(num_edu)){
    for(k in 1:length(num_race)){
      for(l in 1:length(num_gender)){
        count = nrow(survey_data[survey_data$age2==i & survey_data$edu2==j & survey_data$race2==k & survey_data$gender2==l,]);
        shares[n,1] <-count/nrow(survey_data);
        shares[n,2]<-i;
        shares[n,3]<-j;
        shares[n,4]<-k;
        shares[n,5]<-l;
        n = n + 1;
    }
  }
}
}


sum(shares[,1])

shares <-data.frame(shares)
names(shares)[names(shares) == "X1"]<- "sample_share"
names(shares)[names(shares) == "X2"]<- "age2"
names(shares)[names(shares) == "X3"]<- "edu2"
names(shares)[names(shares) == "X4"]<- "race2"
names(shares)[names(shares) == "X5"]<- "gender2"

write_dta(shares, "1_Dataset Construction/ff_sample_weights.dta")
